from typing import List

import sacremoses


tokenizer = sacremoses.MosesTokenizer()
detokenizer = sacremoses.MosesDetokenizer()
normalizer = sacremoses.MosesPunctNormalizer(
    pre_replace_unicode_punct=False,
    post_remove_control_chars=False,
)


def preprocess(text: str, return_str=True) -> str:
    """Preprocess text data."""
    text = normalizer.normalize(text)
    # Tokenize the text
    tokens = tokenizer.tokenize(text, return_str=return_str)

    return tokens


def postprocess(tokens: List[str]) -> str:
    """Postprocess tokens to text."""
    # Detokenize the tokens
    # text = detokenizer.detokenize(tokens, return_str=True)

    return " ".join(tokens)
